The goals of this analysis are to:
To answer these questions, 56 women with a laboratory-diagnosed sexually transmitted infection (STI)and/or BV-intermediate or positive at baseline, attended all subsequent visits (weeks 6 and 12), and had genital specimen stored to conduct the longitudinal microbiota and cytokine analysis were included. 16S rRNA gene sequencing and multiplex bead arrays were used to characterize the composition of vaginal microbial communities and soluble biomarkers of genital inflammation in cervicovaginal swabs and SoftCup fluids. Community state types were inferred using VALENCIA, a nearest centroid classification method for vaginal microbial communities based on composition. Linear regression analysis was used to compare genital cytokine concentrations with prevalent CSTs to test the hypothesis that vaginal microbiota dominated with non-Lactobacillus species would be associated with increased cytokine concentrations compared to communities dominated by Lactobacillus speciesPrincipal component analysis (PCA) was used to obtain summary measures for the multi-variable cytokine set and performed using the FactoMineR package (https://cran.r project.org/web/packages/FactoMineR/index.html) in R (www.r-project.org).
What is the distribution of bacteria in these women? is there any form of clustering?
Three major CSTs identified by VALENCIA
## CSTs custom color color pallete
CSTs_all <- c("I", "II", "III", "IV-A", "IV-B", "IV-C", "V")
CSTsPallete <- c("#7CAE00", "#ff0033", "#C77CFF", "#ff33cc", "#6699CC", "pink", "blue") # "purple", "orange"
CSTsPalleteNamed <- c("#7CAE00", "#ff0033", "#C77CFF", "#ff33cc", "#6699CC", "#ff0066", "#ff9900")
names(CSTsPalleteNamed) <- CSTs_all
CSTsPalleteValues <- c("I"="#7CAE00", "II"="#ff0033", "III"="#C77CFF", "IV-A"="#ff33cc", "IV-B"="#6699CC", "IV-C"="#ff0066", "V"="#ff9900")
visitsPallet <- c("lightskyblue4", "red", "lightskyblue")
proInfC <- c("TNF.b" , "IL.12p40" , "IL.12p70" , "IL.1a" , "IL.6" , "TNF.a" , "IL.1b", "IL.18" , "MIF" , "TRAIL")
allCytokineColnames <- c("IL.1a","IL.1b","IL.6","IL.12p40","IL.12p70","IL.18", "MIF","TNF.a","TNF.b","TRAIL","IL.8","IL.16","CTACK","Eotaxin","IP.10","IFN.a2","GROa","MCP.1","MCP.3","MIG","MIP.1a", "MIP.1b","RANTES","IL.3","IL.7", "IL.9","b.NGF","FGF.basic","G.CSF","GM.CSF", "HGF", "LIF","M.CSF", "PDGF.bb", "SCF","SCGF.b", "SDF.1a", "VEGF","IL.2Ra", "IL.2", "IL.4", "IL.5","IL.13","IL.15","IL.17","IFN.g","IL.1ra", "IL.10")
##
Read in data, prepare for analysis and perform sanity checks
#Read in data
#tax table
tax_tab <- data.frame(readRDS("dada2-Chimera-Taxonomy/tax_table_final.RDS"))
tax_tab$Species <- as.character(tax_tab$Species)
## Use precise taxonomy
tax_tab[tax_tab$Species %in% "Lactobacillus_crispatus_Lactobacillus_helveticus", "Species"] <- "Lactobacillus_crispatus"
tax_tab[tax_tab$Species %in% "Lactobacillus_acidophilus", "Species"] <- "Lactobacillus_crispatus"
tax_tab[tax_tab$Species %in% "Lactobacillus_gasseri_Lactobacillus_johnsonii", "Species"] <- "Lactobacillus_gasseri"
tax_tab[tax_tab$Species %in% "Prevotella_timonensis", "Species"] <- "Prevotella_bivia"
tax_tab$Species <- dplyr::case_when(substr(tax_tab$Species,1,2) %in% "d_" ~ tax_tab$Species,
substr(tax_tab$Species,1,2) %in% "p_" ~ tax_tab$Species,
substr(tax_tab$Species,1,2) %in% "c_" ~ tax_tab$Species,
substr(tax_tab$Species,1,2) %in% "f_" ~ tax_tab$Species,
substr(tax_tab$Species,1,2) %in% "o_" ~ tax_tab$Species,
substr(tax_tab$Species,1,2) %in% "g_" ~ tax_tab$Species,
TRUE ~ gsub("_", " ", tax_tab$Species))
tax_tab <- tax_table(as.matrix(tax_tab))
## Import sample data
mapping.data <- readRDS("processed/sample_data.RDS")
rownames(mapping.data) <- mapping.data$SampleID
## Import pecan csts
pecan_csts <- readRDS("metadata/metadata_cst.RDS")
rownames(pecan_csts) <- pecan_csts$SampleID
## Add pecan csts to sample data
mapping.data_ <- inner_join(mapping.data, pecan_csts[, c(1,15:18)], by = "SampleID")
dim(mapping.data_)
## [1] 392 79
rownames(mapping.data_) <- mapping.data_$SampleID
## Import Otu table
otu_tab <- readRDS("dada2-Chimera-Taxonomy/seqtab_final.RDS")
class(otu_tab) <- "numeric"
## Phylogenetic tree
phy <- readRDS("dada2-Phangorn/phangorn.tree.RDS")
## Create phyloseq object
ps00 <- phyloseq(tax_table(tax_tab), otu_table(otu_tab, taxa_are_rows = F), phy_tree(phy$tree))
ps00
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11018 taxa and 399 samples ]
## tax_table() Taxonomy Table: [ 11018 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 11018 tips and 11016 internal nodes ]
## Now prune extra samples not used from sample data
ps0 <- prune_samples(mapping.data$SampleID, ps00)
ps0 <- prune_taxa(taxa_sums(ps0) > 0, ps0)
ps0
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10723 taxa and 392 samples ]
## tax_table() Taxonomy Table: [ 10723 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10723 tips and 10721 internal nodes ]
## Add metadata to phyloseq object
sample_data(ps0) <- mapping.data_
ps0
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10723 taxa and 392 samples ]
## sample_data() Sample Data: [ 392 samples by 79 sample variables ]
## tax_table() Taxonomy Table: [ 10723 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10723 tips and 10721 internal nodes ]
## Perform a few sanity checks. Results should match what is expected.
# sample_variables(ps0) # Display variables from the mapping file
cat("Total number of taxa in the entire dataset\n", ntaxa(ps0))
## Total number of taxa in the entire dataset
## 10723
cat("Total number of samples \n", nsamples(ps0))
## Total number of samples
## 392
# rank_names(ps0) # Taxonomic ranks to confirm proper naming otherwise, correct
# colnames(tax_table(ps0)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Strain")
Taxa without assignment at the Phylumn level are likely spurious and will not be relevant in the downstream analyses so dropping those. In this case we filter out the ’NA’s and ""s.
#Phylum in our data and sample counts at these phylum. Notice <NA>, samples not assigned to any phylum. We remove these
get_taxa_unique(ps0, "Phylum")
## [1] "Firmicutes" "Actinobacteriota" "Bacteroidota"
## [4] "Fusobacteriota" NA "Proteobacteria"
## [7] "Patescibacteria" "Parabasalia" "Campilobacterota"
## [10] "Verrucomicrobiota" "Spirochaetota" "Cyanobacteria"
## [13] "Euglenozoa" "Retaria" "Acidobacteriota"
## [16] "Vertebrata" "Chloroflexi" "Deinococcota"
#Determine taxa without Phylum assignment
cat("Taxa distribution by Phylum")
## Taxa distribution by Phylum
table(tax_table(ps0)[, "Phylum", exclude = NULL])
##
## Acidobacteriota Actinobacteriota Bacteroidota Campilobacterota
## 6 2394 1469 25
## Chloroflexi Cyanobacteria Deinococcota Euglenozoa
## 1 5 1 1
## Firmicutes Fusobacteriota Parabasalia Patescibacteria
## 5302 631 5 52
## Proteobacteria Retaria Spirochaetota Verrucomicrobiota
## 336 1 4 7
## Vertebrata
## 4
ps0.Phylum <- subset_taxa(ps0, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized"))
table(tax_table(ps0.Phylum)[, "Phylum"], exclude = NULL)
##
## Acidobacteriota Actinobacteriota Bacteroidota Campilobacterota
## 6 2394 1469 25
## Chloroflexi Cyanobacteria Deinococcota Euglenozoa
## 1 5 1 1
## Firmicutes Fusobacteriota Parabasalia Patescibacteria
## 5302 631 5 52
## Proteobacteria Retaria Spirochaetota Verrucomicrobiota
## 336 1 4 7
## Vertebrata
## 4
# Remove taxa that is not present in any sample (keep only those assigned at least one sample)
# summary(taxa_sums(ps0.Phylum))
ps0.Phylum <- prune_taxa(taxa_sums(ps0.Phylum) > 0, ps0.Phylum)
cat("\nTaxa Summary after filtering out 0 read count taxa\n")
##
## Taxa Summary after filtering out 0 read count taxa
summary(taxa_sums(ps0.Phylum))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 18 158 3132 2000 301221
# How many Phylum would be present after filtering?
cat("\nNumber of unique pylum remaining after filtering:\n", length(get_taxa_unique(ps0.Phylum, taxonomic.rank = "Phylum")))
##
## Number of unique pylum remaining after filtering:
## 17
Sample preparation and handling can introduce contamination (non-bacterial DNA). We need to check for and remove all this DNA from the remaining samples.
# Some examples of taxa you may not want to include in your analysis
get_taxa_unique(ps0.Phylum, "Kingdom")
## [1] "Bacteria" "Eukaryota"
get_taxa_unique(ps0.Phylum, "Phylum")
## [1] "Firmicutes" "Actinobacteriota" "Bacteroidota"
## [4] "Fusobacteriota" "Proteobacteria" "Patescibacteria"
## [7] "Parabasalia" "Campilobacterota" "Verrucomicrobiota"
## [10] "Spirochaetota" "Cyanobacteria" "Euglenozoa"
## [13] "Retaria" "Acidobacteriota" "Vertebrata"
## [16] "Chloroflexi" "Deinococcota"
ps0.Phylum # Check the number of taxa prior to removal
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10244 taxa and 392 samples ]
## sample_data() Sample Data: [ 392 samples by 79 sample variables ]
## tax_table() Taxonomy Table: [ 10244 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10244 tips and 10242 internal nodes ]
ps0.phylum.bact <- ps0.Phylum %>%
subset_taxa(
Kingdom == "Bacteria" &
Family != "mitochondria" &
Class != "Chloroplast" &
Phylum != "Cyanobacteria"
)
ps0.phylum.bact # Confirm that the taxa were removed
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9548 taxa and 392 samples ]
## sample_data() Sample Data: [ 392 samples by 79 sample variables ]
## tax_table() Taxonomy Table: [ 9548 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 9548 tips and 9546 internal nodes ]
get_taxa_unique(ps0.phylum.bact, "Kingdom")
## [1] "Bacteria"
get_taxa_unique(ps0.phylum.bact, "Class")
## [1] "Bacilli" "Actinobacteria" "Coriobacteriia"
## [4] "Bacteroidia" "Clostridia" "Fusobacteriia"
## [7] "Negativicutes" "Gammaproteobacteria" "Alphaproteobacteria"
## [10] "Campylobacteria" "Chlamydiae" "Spirochaetia"
## [13] "Acidobacteriae" "Thermoleophilia" "Verrucomicrobiae"
## [16] "Chloroflexia" "Deinococci" "Saccharimonadia"
Plot Phyla present along with information about their prevalence (i.e. fraction of samples they are present in) and total abundance accross samples. Dashed horizontal line to show the 5% margin at which we intend to filter.
## Prevalence estimation
# Calculate feature prevalence across the data set. i.e all the samples in which an ASV is found
df.ps0.prevalence <- apply(X = otu_table(ps0.phylum.bact), MARGIN = ifelse(taxa_are_rows(ps0.phylum.bact), yes = 1, no = 2), FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts (all sequences from a sample) to df.prevalence
df.ps0.prevalence <- data.frame(Prevalence = df.ps0.prevalence, TotalAbundance = taxa_sums(ps0.phylum.bact), tax_table(ps0.phylum.bact))
message("Prevalence range (min max): ", min(df.ps0.prevalence$Prevalence), max(df.ps0.prevalence$Prevalence), "\n")
message("Prevalence Summary Stats: ", summary(df.ps0.prevalence$Prevalence), "\n")
#Prevalence plot
df.ps0.prevalence.phylum <- subset(df.ps0.prevalence, Phylum %in% get_taxa_unique(ps0.phylum.bact, "Phylum"))
gg.ps0.prevalence <- ggplot(df.ps0.prevalence.phylum, aes(TotalAbundance, Prevalence / nsamples(ps0.phylum.bact), color=Family)) +
geom_hline(yintercept = 0.01, alpha = 0.8, linetype = 2) +
geom_point(size = 3, alpha = 0.8) +
scale_x_log10() +
xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~Phylum) +
theme(legend.position="none") +
ggtitle("Phylum Prevalence in All Samples\nColored by Family")
gg.ps0.prevalence
#ggplotly(gg.ps1.prevalence)
plyr::ddply(df.ps0.prevalence, "Phylum", function(df1){
data.frame(mean_prevalence=mean(df1$Prevalence),total_abundance=sum(df1$TotalAbundance,na.rm = T),stringsAsFactors = F)
})
## Phylum mean_prevalence total_abundance
## 1 Acidobacteriota 1.166667 238
## 2 Actinobacteriota 4.116330 7839519
## 3 Bacteroidota 2.011461 1939577
## 4 Campilobacterota 1.000000 2749
## 5 Chloroflexi 1.000000 4
## 6 Deinococcota 1.000000 3
## 7 Firmicutes 2.835210 20550942
## 8 Fusobacteriota 2.513471 1454584
## 9 Patescibacteria 1.000000 2
## 10 Proteobacteria 1.428571 143326
## 11 Spirochaetota 1.000000 674
## 12 Verrucomicrobiota 1.285714 1279
The data is already filtered at 5% prevalence. This step is therefore only activated as need arises for filtering.
prevalenceThreshold = 0.01 * nsamples(ps0.phylum.bact)
cat("Prevalence threshold:\n\n")
## Prevalence threshold:
prevalenceThreshold
## [1] 3.92
# Define which taxa fall within the prevalence threshold
keepTaxa <- rownames(df.ps0.prevalence.phylum)[(df.ps0.prevalence.phylum$Prevalence >= prevalenceThreshold)]
cat("\n\nTax meeting prevalence threshold:\n\n")
##
##
## Tax meeting prevalence threshold:
length(keepTaxa)
## [1] 1351
cat("\n\nTax meeting prevalence threshold:\n\n")
##
##
## Tax meeting prevalence threshold:
ntaxa(ps0.phylum.bact)
## [1] 9548
# Remove those taxa
ps1 <- prune_taxa(keepTaxa, ps0.phylum.bact)
ntaxa(ps1)
## [1] 1351
# Calculate feature prevalence across the data set. i.e all the samples in which an ASV is found
df.ps1.prevalence <- apply(X = otu_table(ps1), MARGIN = ifelse(taxa_are_rows(ps1), yes = 1, no = 2), FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts (all sequences from a sample) to df.prevalence
df.ps1.prevalence <- data.frame(Prevalence = df.ps1.prevalence, TotalAbundance = taxa_sums(ps1), tax_table(ps1))
message("Prevalence range (min max): ", min(df.ps1.prevalence$Prevalence), max(df.ps1.prevalence$Prevalence), "\n")
message("Prevalence Summary Stats: ", summary(df.ps1.prevalence$Prevalence), "\n")
#Prevalence plot
df.ps1.prevalence.phylum <- subset(df.ps1.prevalence, Phylum %in% get_taxa_unique(ps1, "Phylum"))
gg.ps1.prevalence <- ggplot(df.ps1.prevalence.phylum, aes(TotalAbundance, Prevalence / nsamples(ps1), color=Family)) +
geom_hline(yintercept = 0.01, alpha = 0.8, linetype = 2) +
geom_point(size = 3, alpha = 0.8) +
scale_x_log10() +
xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~Phylum) +
theme(legend.position="none") +
ggtitle("Phylum Prevalence in All Samples\nColored by Family")
gg.ps1.prevalence
#ggplotly(gg.ps1.prevalence)
plyr::ddply(df.ps1.prevalence, "Phylum", function(df1){
data.frame(mean_prevalence=mean(df1$Prevalence),total_abundance=sum(df1$TotalAbundance,na.rm = T),stringsAsFactors = F)
})
## Phylum mean_prevalence total_abundance
## 1 Actinobacteriota 17.360577 5639209
## 2 Bacteroidota 9.680556 463716
## 3 Firmicutes 13.007194 14976669
## 4 Fusobacteriota 11.756098 738780
## 5 Proteobacteria 8.857143 28778
Create factor variables for downstream analyses
cat("Reorder BV score levels such that negative comes first.\n")
## Reorder BV score levels such that negative comes first.
levels(sample_data(ps1)$bvscat)
## [1] "Negative" "Intermediate" "BV"
sample_data(ps1)$bvscat <- factor(sample_data(ps0.phylum.bact)$bvscat, levels = c("Negative" , "Intermediate", "BV"))
levels(sample_data(ps1)$bvscat)
## [1] "Negative" "Intermediate" "BV"
#Create/convert STD information to factor
sample_data(ps1)$STI <- as.factor(sample_data(ps1)$STI)
sample_data(ps1)$Inflammation <- as.factor(sample_data(ps1)$Inflammation)
sample_data(ps1)$Chlamydia <- as.factor(sample_data(ps1)$Chlamydia)
sample_data(ps1)$Gonorrhoea <- as.factor(sample_data(ps1)$Gonorrhoea)
sample_data(ps1)$Trichomoniasis <- as.factor(sample_data(ps1)$Trichomoniasis)
sample_data(ps1)$Candidiasis <- as.factor(sample_data(ps1)$Candidiasis)
sample_data(ps1)$PSA <- as.factor(sample_data(ps1)$PSA)
sample_data(ps1)$`HSV.1` <- as.factor(sample_data(ps1)$`HSV.1`)
sample_data(ps1)$`HSV.2` <- as.factor(sample_data(ps1)$`HSV.2`)
sample_data(ps1)$sim_CST <- factor(sample_data(ps1)$sim_CST, levels = c("I", "II", "III", "IV-A", "IV-B", "IV-C", "V"))
sample_data(ps1)$sim_subCST <- as.factor(sample_data(ps1)$sim_subCST)
The CAPRISA 083 study screened 267 HIV-uninfected women, and enrolled 101 women, of whom 56 had genital specimen stored accross 3 time points that were used to conduct the longitudinal microbiota and cytokine analysis.
## Create dataset of participants followed accross all three visits
#Extract sample data
sample.data.ps1 <- data.frame(sample_data(ps1))
#Get visit 2 and 3 ids
visit1.ids <- sample.data.ps1 %>% subset(VisitCode == 1000) %>% pull('ParticipantID')
visit1.ids <- visit1.ids[!visit1.ids %in% c(120117, 120016, 120060, 120221, 120229, 120223, 120010)]
visit2.ids <- sample.data.ps1 %>% subset(VisitCode == 1020) %>% pull('ParticipantID')
visit3.ids <- sample.data.ps1 %>% subset(VisitCode == 1030) %>% pull('ParticipantID')
#IDs of followed samples
ids.v2.not.in.v1 <- subset(visit2.ids, !visit2.ids %in% visit1.ids)
ids.v2.not.in.v3 <- subset(visit2.ids, !visit2.ids %in% visit3.ids)
ids.v3.not.in.v1 <- subset(visit3.ids, !visit3.ids %in% visit1.ids)
ids.v3.not.in.v2 <- subset(visit3.ids, !visit3.ids %in% visit2.ids)
#All missing visit 1 from visit 2 and 3
ids.missing.visit1 <- c(ids.v2.not.in.v1, ids.v3.not.in.v1)
all.missing <- unique(c(ids.missing.visit1, ids.v3.not.in.v2, ids.v2.not.in.v3))
ids.followed.v2v3 <- unique(c(visit2.ids, visit3.ids))
#visit 1, 2 and 3 intersections
v1v2.ids <- unique(subset(sample.data.ps1, ParticipantID %in% visit1.ids & ParticipantID %in% visit2.ids) %>%
pull('ParticipantID'))
v1v3.ids <- unique(subset(sample.data.ps1, ParticipantID %in% visit1.ids & ParticipantID %in% visit3.ids) %>%
pull('ParticipantID'))
v2v3.ids <- unique(subset(sample.data.ps1, ParticipantID %in% visit2.ids & ParticipantID %in% visit3.ids) %>%
pull('ParticipantID'))
##Get dataset with followed samples
ps2.v1v2v3 <- subset_samples(ps1, (sample_data(ps1)$ParticipantID %in% ids.followed.v2v3 &
!sample_data(ps1)$ParticipantID %in% all.missing))
ps2.v1v2v3
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1351 taxa and 168 samples ]
## sample_data() Sample Data: [ 168 samples by 79 sample variables ]
## tax_table() Taxonomy Table: [ 1351 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1351 tips and 1349 internal nodes ]
#Samples followed through the 3 times
ps2.v1v2v3.ids <- unique(sample_data(ps2.v1v2v3)$ParticipantID)
saveRDS(ps2.v1v2v3.ids, "results/v1v2v3-ids.txt")
sample_data(ps2.v1v2v3)$VisitCode <- factor(sample_data(ps2.v1v2v3)$VisitCode, levels = c(1000, 1020, 1030))
#IDs present in visit 1, 2 and 3
ids.followed.v1v2v3 <- unique(sample_data(ps2.v1v2v3)$ParticipantID)
ids.followed.v1v2v3 <- c(ids.followed.v1v2v3) #Plus non bv ids
#Draw venn diagram of visit distribution
draw.triple.venn(area1 = length(visit1.ids), area2 = length(visit2.ids), area3 = length(visit3.ids), n12 = length(v1v2.ids), n23 = length(v2v3.ids), n13 =length(v1v3.ids), n123 = length(ids.followed.v1v2v3), category = c("Visit1", "Visit2", "Visit3"), lty = "blank", fill = c("skyblue", "pink1", "mediumorchid"), cex=2, cat.cex = 2, cat.fontfamily = rep("serif", 3))
## (polygon[GRID.polygon.910], polygon[GRID.polygon.911], polygon[GRID.polygon.912], polygon[GRID.polygon.913], polygon[GRID.polygon.914], polygon[GRID.polygon.915], text[GRID.text.916], text[GRID.text.917], text[GRID.text.918], text[GRID.text.919], text[GRID.text.920], text[GRID.text.921], text[GRID.text.922], text[GRID.text.923], text[GRID.text.924], text[GRID.text.925])
sample_data(ps2.v1v2v3)$bv.persisted <- 0
sample_data(ps2.v1v2v3)$bv.recurred <- 0
sample_data(ps2.v1v2v3)$bv.cleared <- 0
ps2.v1v2v3
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1351 taxa and 168 samples ]
## sample_data() Sample Data: [ 168 samples by 82 sample variables ]
## tax_table() Taxonomy Table: [ 1351 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1351 tips and 1349 internal nodes ]
## Check metadata structure for conformity
#str(sample_data(ps2.v1v2v3))
## Check variables names
## cat("Sample data variable names:\n\n")
## colnames(sample_data(ps2.v1v2v3))
## What is the distribution of bv accross visits?
cat("\n\nDistribution of bv accross visits:\n\n")
##
##
## Distribution of bv accross visits:
table(sample_data(ps2.v1v2v3)$bvscat, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## Negative 0 17 13
## Intermediate 22 19 26
## BV 34 20 17
## What is the distribution of csts accross visits
cat("\n\nDistribution of CSTs accross visits:\n\n")
##
##
## Distribution of CSTs accross visits:
table(sample_data(ps2.v1v2v3)$sim_CST, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## I 1 2 3
## III 15 22 26
## IV-A 17 11 7
## IV-B 23 20 20
## IV-C 0 1 0
## What is the distribution of STIs accross visits
cat("\n\nDistribution of STIs accross visits:\n\n")
##
##
## Distribution of STIs accross visits:
table(sample_data(ps2.v1v2v3)$STI, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## 0 25 44 42
## 1 31 12 14
## What is the distribution of inflammation accross visits
cat("\n\nDistribution of inflammation accross visits:\n\n")
##
##
## Distribution of inflammation accross visits:
table(sample_data(ps2.v1v2v3)$Inflammation, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## 0 44 46 47
## 1 12 10 9
### Load visit 1 analysis specific packages
# Set up required packages
.cran_packages <- c("ggpubr", "mclust", "dunn.test", "cluster")
.bioc_packages <- c()
.inst <- .cran_packages %in% installed.packages()
if(any(!.inst)) {
install.packages(.cran_packages[!.inst])
}
.inst <- .bioc_packages %in% installed.packages()
if(any(!.inst)) {
source("http://bioconductor.org/biocLite.R")
biocLite(.bioc_packages[!.inst], ask = F)
}
# Load packages into session, and print package version
sapply(c(.cran_packages, .bioc_packages), require, character.only = TRUE)
## ggpubr mclust dunn.test cluster
## TRUE TRUE TRUE TRUE
## Sequencing depth summary
dt.ps2.v1v2v3.sample_data = cbind(as(sample_data(ps2.v1v2v3), "data.frame"),
TotalReads = sample_sums(ps2.v1v2v3)) ##Total reads is sequence depth
gg.SeqDepth = ggplot(dt.ps2.v1v2v3.sample_data, aes(TotalReads)) + geom_histogram() + ggtitle("Sequencing Depth")
ggplotly(gg.SeqDepth)
The graph shows how many reads each sample has. The Y-axis shows the number samples and the X-axis shows the total number of reads for those samples
How are the participants distributed accross visits?
## Distribution of BV accross visits
cat("BV status")
## BV status
table(sample_data(ps2.v1v2v3)$bvscat, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## Negative 0 17 13
## Intermediate 22 19 26
## BV 34 20 17
cat("\nInflammation")
##
## Inflammation
table(sample_data(ps2.v1v2v3)$Inflammation, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## 0 44 46 47
## 1 12 10 9
cat("\nInflammation vs BV")
##
## Inflammation vs BV
table(sample_data(ps2.v1v2v3)$Inflammation, sample_data(ps2.v1v2v3)$bvscat)
##
## Negative Intermediate BV
## 0 26 55 56
## 1 4 12 15
cat("\nSTI")
##
## STI
table(sample_data(ps2.v1v2v3)$STI, sample_data(ps2.v1v2v3)$VisitCode)
##
## 1000 1020 1030
## 0 25 44 42
## 1 31 12 14
## Estimate richness
ps2.v1v2v3.rich <- cestimate_richness(ps2.v1v2v3)
## Aglomerate to species
ps2.v1v2v3.glom.species <- tax_glom(ps2.v1v2v3.rich, "Species")
## Transform to relative abundance
ps2.v1v2v3.glom.species.ra <- transform_sample_counts(ps2.v1v2v3.glom.species, function(x) {
x/sum(x)
})
otu_table(ps2.v1v2v3.glom.species.ra)[is.nan(otu_table(ps2.v1v2v3.glom.species.ra))] <- 0
## Distribution of samples accross CSTS
levels(sample_data(ps2.v1v2v3.glom.species.ra)$sim_CST)
## [1] "I" "III" "IV-A" "IV-B" "IV-C"
table(sample_data(ps2.v1v2v3.glom.species.ra)$sim_CST)
##
## I III IV-A IV-B IV-C
## 6 63 35 63 1
table(sample_data(ps2.v1v2v3.glom.species.ra)$sim_CST, sample_data(ps2.v1v2v3.glom.species.ra)$VisitCode)
##
## 1000 1020 1030
## I 1 2 3
## III 15 22 26
## IV-A 17 11 7
## IV-B 23 20 20
## IV-C 0 1 0
table(sample_data(ps2.v1v2v3.glom.species.ra)$sim_CST, sample_data(ps2.v1v2v3.glom.species.ra)$bvscat)
##
## Negative Intermediate BV
## I 4 2 0
## III 24 26 13
## IV-A 0 9 26
## IV-B 2 30 31
## IV-C 0 0 1
#Sort by CTS snd Shannon indexes before proceeding
sample_data(ps2.v1v2v3.glom.species.ra) <- sample_data(ps2.v1v2v3.glom.species.ra)[order(sample_data(ps2.v1v2v3.glom.species.ra)$sim_CST, sample_data(ps2.v1v2v3.glom.species.ra)$ShannonLgT), ]
##
## 1000 1020 1030
## Negative 0 17 13
## Intermediate 22 19 26
## BV 34 20 17
## numeric(0)
sd.ps2.v1v2v3.glom.species.ra <- data.frame(sample_data(ps2.v1v2v3.glom.species.ra))
ggpaired(sd.ps2.v1v2v3.glom.species.ra, x = "VisitCode", y = "ShannonLgT",
color = "VisitCode", line.color = "gray", line.size = 0.4,
palette = visitsPallet, id = "ParticipantID", width = 0.1 ) +
scale_x_discrete(breaks=c(1000, 1020, 1030), labels=c("Baseline", "Visit 2", "Visit 3")) +
scale_color_discrete(name = "Clinical Visits", labels = c("Baseline", "Visit 2", "Visit 3")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme_light() +
ylab("Shannon Diversity") +
xlab("Clinical Visits") +
stat_compare_means(paired = TRUE)
## Change in Shannon diversity accross visits
my_comparisons <- list( c("1000", "1020"), c("1020", "1030"))
ggpaired(sd.ps2.v1v2v3.glom.species.ra, x = "VisitCode", y = "ShannonLgT",
color = "VisitCode", line.color = "gray", line.size = 0.4,
palette = visitsPallet, id = "ParticipantID", width = 0.1 ) +
scale_x_discrete(breaks=c(1000, 1020, 1030), labels=c("Baseline", "Visit 2", "Visit 3")) +
scale_color_discrete(name = "Clinical Visits", labels = c("Baseline", "Visit 2", "Visit 3")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme_light() +
ylab("Shannon Diversity") +
xlab("Clinical Visits") +
stat_compare_means(paired = TRUE, comparisons = my_comparisons)
PS: Legends for the annotations will be imposed in the final graphics using a different program. However, for BV, No BV=white, intermediate=grey and bv positive=maroon. For the rest of the annotations, color=positive.
##
## Shapiro-Wilk normality test
##
## data: sd.ps2.v1v2v3.glom.species.ra$ShannonLgT
## W = 0.86853, p-value = 0.00000000005878
##
## Kruskal-Wallis rank sum test
##
## data: sd.ps2.v1v2v3.glom.species.ra$ShannonLgT and as.factor(sd.ps2.v1v2v3.glom.species.ra$bvscat)
## Kruskal-Wallis chi-squared = 9.3617, df = 2, p-value = 0.009271
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 9.3617, df = 2, p-value = 0.01
##
##
## Comparison of x by group
## (No adjustment)
## Col Mean-|
## Row Mean | BV Intermed
## ---------+----------------------
## Intermed | 1.002800
## | 0.1580
## |
## Negative | 3.054965 2.250734
## | 0.0011* 0.0122*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
##
## Kruskal-Wallis rank sum test
##
## data: sd.ps2.v1v2v3.glom.species.ra$ShannonLgT and sd.ps2.v1v2v3.glom.species.ra$sim_CST
## Kruskal-Wallis chi-squared = 29.005, df = 4, p-value = 0.000007798
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 29.0052, df = 4, p-value = 0
##
##
## Comparison of x by group
## (No adjustment)
## Col Mean-|
## Row Mean | I III IV-A IV-B
## ---------+--------------------------------------------
## III | -0.701540
## | 0.2415
## |
## IV-A | -2.434721 -3.681219
## | 0.0075* 0.0001*
## |
## IV-B | -2.543799 -4.417582 -0.052319
## | 0.0055* 0.0000* 0.4791
## |
## IV-C | 0.618589 0.960292 1.719560 1.741218
## | 0.2681 0.1685 0.0428 0.0408
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
##
## Wilcoxon rank sum test with continuity correction
##
## data: ShannonLgT by STI
## W = 3157, p-value = 0.984
## alternative hypothesis: true location shift is not equal to 0
##
## Wilcoxon rank sum test with continuity correction
##
## data: ShannonLgT by Inflammation
## W = 2119, p-value = 0.987
## alternative hypothesis: true location shift is not equal to 0
##
## Wilcoxon rank sum test with continuity correction
##
## data: ShannonLgT by PSA
## W = 2309, p-value = 0.6626
## alternative hypothesis: true location shift is not equal to 0
##
## Call: glm(formula = ShannonLgT ~ bvscat * Inflammation, data = sd.ps2.v1v2v3.glom.species.ra)
##
## Coefficients:
## (Intercept) bvscatIntermediate
## 0.55241 0.02717
## bvscatBV Inflammation1
## 0.05246 -0.05062
## bvscatIntermediate:Inflammation1 bvscatBV:Inflammation1
## 0.05141 0.01827
##
## Degrees of Freedom: 167 Total (i.e. Null); 162 Residual
## Null Deviance: 2.581
## Residual Deviance: 2.502 AIC: -216
Do we see any effect of sequencing run that can confound our analysis?
evals <- ord.ps2.v1v2v3.glom.species.ra.pcoa.bray$values$Eigenvalues
p.pcoa.bray <- plot_ordination(ps2.v1v2v3.glom.species.ra, ord.ps2.v1v2v3.glom.species.ra.pcoa.bray, color = "SeqRun", axes = c(1,2)) +
geom_point(size = 2) +
scale_fill_manual(values = CSTsPallete) +
labs(title = "PCoA of Bray Curtis (Abundance)", color = "SeqRuns") +
theme_minimal() +
coord_fixed(sqrt(evals[2] / evals[1])) #+
#stat_ellipse(type = "t")
p.pcoa.bray
## Cross tabulation of CTs and BV Score
##
## Negative Intermediate BV
## I 4 2 0
## III 24 26 13
## IV-A 0 9 26
## IV-B 2 30 31
## IV-C 0 0 1
## Cross tabulation of CTs and Inflamation
##
## 0 1
## I 5 1
## III 51 12
## IV-A 31 4
## IV-B 49 14
## IV-C 1 0
In this section we focus only on participants with all 3 visits. We assess the microbiome dynamics accross visits to establish treatment effects on the microbiome profiles of young women in the study.
Ordinations
What is the normal characterization of bacterial flora in women within this community that do not have BV?
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 62 taxa and 51 samples ]
## sample_data() Sample Data: [ 51 samples by 83 sample variables ]
## tax_table() Taxonomy Table: [ 62 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 62 tips and 61 internal nodes ]
## [1] "I" "III" "IV-A" "IV-B"
## NULL
Visit 1 biplots
Visit three heatmaps and bar plots
Visit 3 PCA biplots
Here we consider the relative abundance of bacteria among the participants who were bv intermediate or positive, had cleared in visit2 and tested positive or intermediate in visit 3
### BV treated - cleared - recurred (at individual level)
ps2.v1.alluv <- subset_samples(ps2.v1v2v3.glom.species.ra, VisitCode == 1000)
ps2.v1.alluv_sd <- data.frame(sample_data(ps2.v1.alluv)[order(sample_data(ps2.v1.alluv)$SampleID, sample_data(ps2.v1.alluv)$VisitCode),])
ps2.v2.alluv <- subset_samples(ps2.v1v2v3.glom.species.ra, VisitCode == 1020)
ps2.v2.alluv_sd <- data.frame(sample_data(ps2.v2.alluv)[order(sample_data(ps2.v2.alluv)$SampleID, sample_data(ps2.v2.alluv)$VisitCode),])
ps2.v3.alluv <- subset_samples(ps2.v1v2v3.glom.species.ra, VisitCode == 1030)
ps2.v3.alluv_sd <- data.frame(sample_data(ps2.v3.alluv)[order(sample_data(ps2.v3.alluv)$SampleID, sample_data(ps2.v3.alluv)$VisitCode),])
alluvialData1 <- inner_join(ps2.v1.alluv_sd[, c("ParticipantID", "sim_CST")], ps2.v2.alluv_sd[, c("ParticipantID", "sim_CST")], by = "ParticipantID")
alluvialData1 <- inner_join(alluvialData1, ps2.v3.alluv_sd[, c("ParticipantID", "sim_CST")], by = "ParticipantID")
alluvialData1 <- alluvialData1[, (2:4)]
colnames(alluvialData1) <- c("Visit1", "Visit2", "Visit3")
alluvialData1 <- alluvialData1 %>% dplyr::group_by(Visit1, Visit2, Visit3) %>% dplyr::summarise(n = n())
alluvial(
alluvialData1[, 1:3],
freq=alluvialData1$n,
col = ifelse( alluvialData1$Visit1 == "I", CSTsPallete[1],
ifelse( alluvialData1$Visit1 == "II", CSTsPallete[2],
ifelse( alluvialData1$Visit1 == "III", CSTsPallete[3],
ifelse( alluvialData1$Visit1 == "IV-A", CSTsPallete[4],
ifelse( alluvialData1$Visit1 == "IV-B", CSTsPallete[5],
ifelse( alluvialData1$Visit1 == "IV-C", CSTsPallete[6],
ifelse( alluvialData1$Visit1 == "V", CSTsPallete[7], ""))))))),
border = c("white"),
alpha = 0.8,
blocks=FALSE,
axis_labels = c("Baseline", "6 weeks", "3 months")
)
## I III IV.A IV.B IV.C
## Visit1 1 15 17 23 0
## Visit2 2 22 11 20 1
## Visit3 3 26 7 20 0
## Visit1 Visit2 Visit3
## I 0.01785714 0.03571429 0.05357143
## III 0.26785714 0.39285714 0.46428571
## IV.A 0.30357143 0.19642857 0.12500000
## IV.B 0.41071429 0.35714286 0.35714286
## IV.C 0.00000000 0.01785714 0.00000000
library(corrplot)
profInfC <- c("TNF.b" , "IL.12p40" , "IL.12p70" , "IL.1a" , "IL.6" , "TNF.a" , "IL.1b", "IL.18" , "MIF" , "TRAIL")
#View(data.frame(sample_data(ps2.v1v2v3.cst.glom.Species.ra)))
ps2.v1.cyto <- prune_samples(sample_data(ps2.v1v2v3.glom.species.ra)$VisitCode == 1000, ps2.v1v2v3.glom.species.ra)
ps2.v1.cyto <- prune_taxa(taxa_sums(ps2.v1.cyto) > 0, ps2.v1.cyto)
ps2.v2.cyto <- prune_samples(sample_data(ps2.v1v2v3.glom.species.ra)$VisitCode == 1020, ps2.v1v2v3.glom.species.ra)
ps2.v2.cyto <- prune_taxa(taxa_sums(ps2.v2.cyto) > 0, ps2.v2.cyto)
visit1cyto <- subset(data.frame(sample_data(ps2.v1v2v3.glom.species.ra)), VisitCode == 1000, select = c("TNF.b" , "IL.12p40" , "IL.12p70" , "IL.1a" , "IL.6" , "TNF.a" , "IL.1b", "IL.18" , "MIF" , "TRAIL"))
visit2cyto <- subset(data.frame(sample_data(ps2.v1v2v3.glom.species.ra)), VisitCode == 1020, select = c("TNF.b" , "IL.12p40" , "IL.12p70" , "IL.1a" , "IL.6" , "TNF.a" , "IL.1b", "IL.18" , "MIF" , "TRAIL"))
visit1cyto$PID <- as.numeric(substr(rownames(visit1cyto), 1, nchar(rownames(visit1cyto))-1))
visit2cyto$PID <- as.numeric(substr(rownames(visit2cyto), 1, nchar(rownames(visit2cyto))-1))
idsInBoth <- intersect(visit1cyto$PID, visit2cyto$PID)
visit1cyto <- visit1cyto[visit1cyto$PID %in% idsInBoth, ]
visit2cyto <- visit2cyto[visit2cyto$PID %in% idsInBoth, ]
## order by rownames of v2c
visit1cyto <- visit1cyto[with(visit2cyto, order(rownames(visit1cyto))), ]
visit12FC <- (visit2cyto - visit1cyto[with(visit2cyto, order(rownames(visit1cyto))), ])
visit12FC$PID <- NULL ##drop PID column
##V1 samples
V1_otu_tab <- data.frame(otu_table(ps2.v1.cyto))
V1_otu_tab1 <- as.data.frame(t(V1_otu_tab))
V1_otu_tab1$OTU <- rownames(V1_otu_tab1)
V1_tax_tab <- data.frame(tax_table(ps2.v1.cyto))
V1_tax_tab$OTU <- rownames(V1_tax_tab)
otuWSpeciesV1 <- merge(V1_otu_tab1, V1_tax_tab[, c("OTU", "Species")], by = "OTU")
otuWSpeciesV1 <- otuWSpeciesV1[, -1]
otuWSpeciesV1 <- otuWSpeciesV1[!duplicated(otuWSpeciesV1$Species),]
rownames(otuWSpeciesV1) <- otuWSpeciesV1$Species
t.otuWSpeciesV1 <- data.frame(t(otuWSpeciesV1[, -ncol(otuWSpeciesV1)]))
##V2 samples
V2_otu_tab <- data.frame(otu_table(ps2.v2.cyto))
V2_otu_tab1 <- as.data.frame(t(V2_otu_tab))
V2_otu_tab1$OTU <- rownames(V2_otu_tab1)
V2_tax_tab <- data.frame(tax_table(ps2.v2.cyto))
V2_tax_tab$OTU <- rownames(V2_tax_tab)
otuWSpeciesV2 <- merge(V2_otu_tab1, V1_tax_tab[, c("OTU", "Species")], by = "OTU")
otuWSpeciesV2 <- otuWSpeciesV2[, -1]
otuWSpeciesV2 <- otuWSpeciesV2[!duplicated(otuWSpeciesV2$Species),]
rownames(otuWSpeciesV2) <- otuWSpeciesV2$Species
t.otuWSpeciesV2 <- data.frame(t(otuWSpeciesV2[, -ncol(otuWSpeciesV2)]))
## resolve ids
t.otuWSpeciesV1$PID <- as.numeric(substr(rownames(t.otuWSpeciesV1), 1, nchar(rownames(t.otuWSpeciesV1))-1))
t.otuWSpeciesV2$PID <- as.numeric(substr(rownames(t.otuWSpeciesV2), 1, nchar(rownames(t.otuWSpeciesV2))-1))
## Taxa in both
taxaInBoth <- intersect(colnames(t.otuWSpeciesV1), colnames(t.otuWSpeciesV2))
t.otuWSpeciesV1 <- t.otuWSpeciesV1[t.otuWSpeciesV1$PID %in% idsInBoth, taxaInBoth]
t.otuWSpeciesV2 <- t.otuWSpeciesV2[t.otuWSpeciesV2$PID %in% idsInBoth, taxaInBoth]
##Fold Change
otuWSpeciesFC <- (t.otuWSpeciesV2 - t.otuWSpeciesV1[with(t.otuWSpeciesV2, order(rownames(t.otuWSpeciesV1))), ])
otuWSpeciesFC$PID <- NULL ##drop PID column
cyto.spec <- cbind(otuWSpeciesFC, visit12FC)
cyto.spec.pca <- PCA(scale(as.matrix(cyto.spec), center = TRUE, scale = TRUE), scale.unit = FALSE, graph = FALSE)
corrplot::corrplot(cor(otuWSpeciesFC , visit12FC), type="full",
p.mat = cyto.spec.pca$P, sig.level = 0.001, insig = "blank")
#par(cex = cex.before)
v2sd <- data.frame(sample_data(ps2.v2.cyto))
ps2.v2.cyto.ha <- subset_samples(ps2.v2.cyto, get_variable(ps2.v2.cyto, "ParticipantID") %in% idsInBoth)
fviz_pca_biplot(cyto.spec.pca,
#individuals
geom.ind = "point", # show points only (but not "text"),
habillage = (data.frame(sample_data(ps2.v2.cyto.ha)))$sim_CST,
col.var = "black",
#variables
alpha.var = "cos2",
palette = CSTsPallete,
legend.title = "sim_CST",
title = "PCA plot Showing the Relationship Between Bacteria and Proinflammatory Cytokines",
repel = TRUE)+
scale_color_brewer(palette="Dark2")+
theme_minimal(base_size = 14) +
theme(text = element_text(face = "bold"))
## Visit 2 (treatment)
ps2.v2.cst.glom.species.ra <- subset_samples(ps2.v1v2v3.glom.species.ra, get_variable(ps2.v1v2v3.glom.species.ra, "VisitCode") == 1020)
ps2.v2.cst.glom.species.ra <- prune_taxa(taxa_sums(ps2.v2.cst.glom.species.ra) >0, ps2.v2.cst.glom.species.ra)
ps2.v2.cst.glom.species.ra
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 25 taxa and 56 samples ]
## sample_data() Sample Data: [ 56 samples by 87 sample variables ]
## tax_table() Taxonomy Table: [ 25 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 25 tips and 24 internal nodes ]
keyCytokines <- c("G.CSF", "IL.1a", "IL.1b", "M.CSF")
ps2.v2.cyto <- subset_taxa(ps2.v2.cst.glom.species.ra, Species %in% keyTaxa)
taxa_names(ps2.v2.cyto) <- tax_table(ps2.v2.cyto)[, "Species"]
otu.ps2.v2.cyto <- data.frame(otu_table(ps2.v2.cyto))
df.ps2.v2.cyto <- data.frame(sample_data(ps2.v2.cyto)[, c(keyCytokines)])
dt.ps2.v2.cyto.otu <- cbind(otu.ps2.v2.cyto, df.ps2.v2.cyto)
dt.ps2.v2.pca <- PCA(scale(as.matrix(dt.ps2.v2.cyto.otu), center = TRUE, scale = TRUE), scale.unit = FALSE, graph = FALSE)
fviz_pca_biplot(dt.ps2.v2.pca,
#individuals
geom.ind = "point", # show points only (but not "text"),
habillage = (data.frame(sample_data(ps2.v2.cyto)))$sim_CST,
col.var = "black",
#variables
alpha.var = "cos2",
palette = CSTsPallete,
legend.title = "sim_CST",
title = "PCA plot Showing the Relationship Between Bacteria and Proinflammatory Cytokines",
repel = TRUE)+
scale_color_brewer(palette="Dark2")+
theme_minimal(base_size = 14) +
theme(text = element_text(face = "bold"))
### Correlation analyses:
library("Hmisc")
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
res2<-rcorr(as.matrix(dt.ps2.v2.cyto.otu))
## flattenCorrMatrix(res2$r, res2$P)
Only Cleared
##
## 0 1
## 43 13
##
## 0 1
## 29 27
##
## 0 1
## 40 16
Only persisted
Only recurred
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1276 taxa and 168 samples ]
## sample_data() Sample Data: [ 168 samples by 82 sample variables ]
## tax_table() Taxonomy Table: [ 1276 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1276 tips and 1274 internal nodes ]
## [1] "Intercept" "VisitCode_1020_vs_1000" "VisitCode_1030_vs_1000"
##
## out of 63 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 21, 33%
## LFC < 0 (down) : 27, 43%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 63
## [1] 45
## [1] "Row.names" "baseMean" "log2FoldChange" "lfcSE"
## [5] "stat" "pvalue" "padj" "Kingdom"
## [9] "Phylum" "Class" "Order" "Family"
## [13] "Genus" "Species"
##
## out of 63 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 14, 22%
## LFC < 0 (down) : 34, 54%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 63
## [1] 45
## [1] "Row.names" "baseMean" "log2FoldChange" "lfcSE"
## [5] "stat" "pvalue" "padj" "Kingdom"
## [9] "Phylum" "Class" "Order" "Family"
## [13] "Genus" "Species"
Cytokine Heatmaps - All visits
significant_cytokines <- c("PC1", "IL.1a" ,"IL.1b", "IL.6","IL.12p40","IL.12p70","IL.18","MIF", "TNF.a", "TNF.b","TRAIL", "MIP.1b", "CTACK", "GROa", "IP.10", "MIG", "RANTES", "LIF", "SCF" , "SCGF.b" , "IL.5")
df.all.cyto.melis.scale.sub <- df.all.cyto.melis.scale[, significant_cytokines]
df.all.cyto.melis.scale.sub.t <- t(df.all.cyto.melis.scale.sub)
#[order(rownames(df.all.cyto.melis.scale.t.sub)[significant_cytokines]),]
## Plot cytokine heatmaps of all Cytokines
plotHeatmaps(df.all.cyto.melis.scale.sub.t, df.all.sampledata.melis, myAnnotColmns)
###Print them all out
myAnnotColmnsb <- c( "sim_subCST")
plotHeatmaps_b <- function(dataFr, dataFr2, annotColmnsb){
require(gplots)
for(i in c(1:length(annotColmnsb))){
colName <- annotColmnsb[i]
#Define color vector
colVec <- as.vector(dataFr2[, colName])
colVec <- replace(colVec, which(colVec == "I-A"), CSTsPallete[1])
colVec <- replace(colVec, which(colVec == "I-B"), CSTsPallete[2])
colVec <- replace(colVec, which(colVec == "III-A"),CSTsPallete[5])
colVec <- replace(colVec, which(colVec == "III-B"),CSTsPallete[6])
colVec <- replace(colVec, which(colVec == "IV-A"), CSTsPallete[7])
colVec <- replace(colVec, which(colVec == "IV-B"), CSTsPallete[8])
colVec <- replace(colVec, which(colVec == "IV-C0"),CSTsPallete[9])
#Generate heatmaps
heatmap.2(dataFr,
main = paste0("Heatmap with ", colName, " annotation"),
hclustfun = hclust2,
distfun=dist2,
trace="none", # turns off trace lines inside the heat map
col=my_palette, # use on color palette defined earlier
breaks=col_breaks, # enable color transition at specified limits
sepcolor="gray70", # determines the separation color
colsep=0:ncol(dataFr), # determines where column separators go
rowsep=0:nrow(dataFr), # determines where row separators go
sepwidth=c(0.01,0.01), # determines the width of the separators
density.info="none", # turns off density plot inside color legend
keysize = 1.5,
labCol = FALSE, #remove column names
ColSideColors=colVec,
key = T
)
}
}
## Plot cytokine heatmaps of all Cytokines
plotHeatmaps_b(df.all.cyto.melis.scale.sub.t, df.all.sampledata.melis, myAnnotColmnsb)
###Print them all out
myAnnotColmnsc <- c("bvscat")
plotHeatmaps_c <- function(dataFr, dataFr2, annotColmnsc){
require(gplots)
for(i in c(1:length(annotColmnsc))){
colName <- annotColmnsc[i]
#Define color vector
colVec <- as.vector(dataFr2[, colName])
colVec <- replace(colVec, which(colVec == "Negative"),"white")
colVec <- replace(colVec, which(colVec == "Intermediate"),"grey60")
colVec <- replace(colVec, which(colVec == "BV"),"maroon")
#Generate heatmaps
heatmap.2(dataFr,
main = paste0("Heatmap with ", colName, " annotation"),
hclustfun = hclust2,
distfun=dist2,
trace="none", # turns off trace lines inside the heat map
col=my_palette, # use on color palette defined earlier
breaks=col_breaks, # enable color transition at specified limits
sepcolor="gray70", # determines the separation color
colsep=0:ncol(dataFr), # determines where column separators go
rowsep=0:nrow(dataFr), # determines where row separators go
sepwidth=c(0.01,0.01), # determines the width of the separators
density.info="none", # turns off density plot inside color legend
keysize = 1.5,
labCol = FALSE, #remove column names
ColSideColors=colVec,
key = T
)
}
}
## Plot cytokine heatmaps of all Cytokines
plotHeatmaps_c(df.all.cyto.melis.scale.sub.t, df.all.sampledata.melis, myAnnotColmnsc)
Here we assess the likelihoods of transitioning between CSTs between visits. i.e. is the introduction of medication likely to cause a transition in CST? if yes, what are the most probable transitions?
## Warning: package 'igraph' was built under R version 3.5.2
## Warning: package 'markovchain' was built under R version 3.5.2
## igraph markovchain dunn.test
## TRUE TRUE TRUE
Here we assess the effect of treatment on the CSTs status of participants between visit 1 and visit 2. Does treatment have an effect on the transtion between CSTs? and more importantly will there be significant transtion towards L. Crispatus? which is the desired state for a healthy vagina.
##
## Visit 1 - BV/CST Distribution
##
## I III IV-A IV-B
## Intermediate 1 7 7 7
## BV 0 8 10 16
##
## Visit 2 - BV/CST Distribution
##
## I III IV-A IV-B IV-C
## Negative 1 14 0 2 0
## Intermediate 1 6 1 11 0
## BV 0 2 10 7 1
##
## Visit 3 - BV/CST Distribution
##
## I III IV-A IV-B
## Negative 3 10 0 0
## Intermediate 0 13 1 12
## BV 0 3 6 8
## Marked increase L.Crispatus after treatment and reduction in diversity
##
## I III IV-A IV-B
## I 0 1 0 0
## III 3 6 1 5
## IV-A 0 6 4 7
## IV-B 0 13 2 8
## Table showing transitions accross visits
| dt.ps2.v1v2.netM$CurrCST | ||||
|---|---|---|---|---|
| I | III | IV-A | IV-B | |
| dt.ps2.v1v2.netM$PrevCST | ||||
| I | 1 | |||
| III | 3 | 6 | 1 | 5 |
| IV-A | 6 | 4 | 7 | |
| IV-B | 13 | 2 | 8 | |
| #Total cases | 3 | 26 | 7 | 20 |
## [,1] [,2] [,3] [,4]
## I-A 0.0 0.0000000 0.00000000 0.0000000
## I-B 0.0 0.0000000 0.00000000 0.0000000
## III-A 0.0 1.0000000 0.00000000 0.0000000
## III-B 0.2 0.4000000 0.06666667 0.3333333
## IV-A 0.0 0.3529412 0.23529412 0.4117647
## IV-B 0.0 0.5652174 0.08695652 0.3478261
## [1] 0.0000000 0.0000000 0.0000000 0.9102392
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] markovchain_0.6.9.14 igraph_1.2.4.1
## [3] gplots_3.0.1.1 EnhancedVolcano_1.0.1
## [5] Hmisc_4.2-0 Formula_1.2-3
## [7] survival_2.44-1.1 corrplot_0.84
## [9] alluvial_0.1-2 plyr_1.8.4
## [11] ggsn_0.5.0 factoextra_1.0.5
## [13] FactoMineR_1.41 ggrepel_0.8.1
## [15] cluster_2.0.9 dunn.test_1.3.5
## [17] mclust_5.4.3 ggpubr_0.2
## [19] magrittr_1.5 biomformat_1.10.1
## [21] ape_5.3 reshape2_1.4.3
## [23] scales_1.0.0 DESeq2_1.22.2
## [25] SummarizedExperiment_1.12.0 DelayedArray_0.8.0
## [27] BiocParallel_1.16.6 matrixStats_0.54.0
## [29] Biobase_2.42.0 GenomicRanges_1.34.0
## [31] GenomeInfoDb_1.18.2 IRanges_2.16.0
## [33] S4Vectors_0.20.1 BiocGenerics_0.28.0
## [35] phyloseq_1.26.1 dada2_1.10.1
## [37] Rcpp_1.0.3 VennDiagram_1.6.20
## [39] futile.logger_1.4.3 stringi_1.4.3
## [41] expss_0.8.11 plotly_4.9.0
## [43] RColorBrewer_1.1-2 data.table_1.12.2
## [45] vegan_2.5-5 lattice_0.20-38
## [47] permute_0.9-5 forcats_0.4.0
## [49] stringr_1.4.0 dplyr_0.8.3
## [51] purrr_0.3.3 readr_1.3.1
## [53] tidyr_1.0.0 tibble_2.1.3
## [55] ggplot2_3.2.1 tidyverse_1.2.1
## [57] gridExtra_2.3 knitr_1.26
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 RSQLite_2.1.1 AnnotationDbi_1.44.0
## [4] htmlwidgets_1.5.1 maptools_0.9-5 munsell_0.5.0
## [7] codetools_0.2-16 units_0.6-2 withr_2.1.2
## [10] colorspace_1.4-1 rstudioapi_0.10 leaps_3.0
## [13] ggsignif_0.5.0 labeling_0.3 RgoogleMaps_1.4.5.3
## [16] GenomeInfoDbData_1.2.0 hwriter_1.3.2 bit64_0.9-7
## [19] rhdf5_2.26.2 vctrs_0.2.0 generics_0.0.2
## [22] lambda.r_1.2.3 xfun_0.11 R6_2.4.1
## [25] locfit_1.5-9.1 bitops_1.0-6 assertthat_0.2.1
## [28] promises_1.1.0 nnet_7.3-12 gtable_0.3.0
## [31] rlang_0.4.1 zeallot_0.1.0 genefilter_1.64.0
## [34] scatterplot3d_0.3-41 splines_3.5.0 lazyeval_0.2.2
## [37] acepack_1.4.1 broom_0.5.2 checkmate_1.9.3
## [40] yaml_2.2.0 modelr_0.1.4 crosstalk_1.0.0
## [43] backports_1.1.5 httpuv_1.5.2 tools_3.5.0
## [46] base64enc_0.1-3 zlibbioc_1.28.0 classInt_0.3-3
## [49] RCurl_1.95-4.12 rpart_4.1-15 ggmap_3.0.0
## [52] haven_2.2.0 futile.options_1.0.1 hms_0.5.2
## [55] mime_0.7 evaluate_0.14 xtable_1.8-4
## [58] XML_3.98-1.20 jpeg_0.1-8.1 readxl_1.3.1
## [61] compiler_3.5.0 KernSmooth_2.23-15 crayon_1.3.4
## [64] htmltools_0.4.0 mgcv_1.8-28 later_1.0.0
## [67] geneplotter_1.60.0 expm_0.999-4 RcppParallel_4.4.2
## [70] lubridate_1.7.4 DBI_1.0.0 formatR_1.6
## [73] matlab_1.0.2 MASS_7.3-51.4 sf_0.7-4
## [76] ShortRead_1.40.0 Matrix_1.2-17 ade4_1.7-13
## [79] cli_1.1.0 gdata_2.18.0 pkgconfig_2.0.3
## [82] flashClust_1.01-2 GenomicAlignments_1.18.1 foreign_0.8-71
## [85] sp_1.3-2 xml2_1.2.2 foreach_1.4.4
## [88] annotate_1.60.1 multtest_2.38.0 XVector_0.22.0
## [91] rvest_0.3.5 digest_0.6.22 Biostrings_2.50.2
## [94] rmarkdown_1.17 cellranger_1.1.0 htmlTable_1.13.1
## [97] gtools_3.8.1 shiny_1.4.0 Rsamtools_1.34.1
## [100] rjson_0.2.20 lifecycle_0.1.0 nlme_3.1-140
## [103] jsonlite_1.6 Rhdf5lib_1.4.3 viridisLite_0.3.0
## [106] pillar_1.4.2 fastmap_1.0.1 httr_1.4.1
## [109] glue_1.3.1 png_0.1-7 iterators_1.0.10
## [112] bit_1.1-14 class_7.3-15 blob_1.1.1
## [115] caTools_1.17.1.2 latticeExtra_0.6-28 memoise_1.1.0
## [118] e1071_1.7-2